/* Copyright 2014 Tobias Marschall
 * 
 * This file is part of CLEVER.
 * 
 * CLEVER is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CLEVER is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CLEVER.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <fstream>
#include <vector>
#include <cassert>

#include <boost/program_options.hpp>
#include <boost/unordered_map.hpp>
#include <boost/unordered_set.hpp>
// #include <boost/tokenizer.hpp>
// #include <boost/lexical_cast.hpp>
// #include <boost/dynamic_bitset.hpp>
// #include <boost/concept_check.hpp>

#include "Variation.h"
#include "VariationUtils.h"
#include "VariationListParser.h"
#include "VersionInfo.h"

using namespace std;
using namespace boost;
namespace po = boost::program_options;

void usage(const char* name, const po::options_description& options_desc) {
	cerr << "Usage: " << name << " [options] [<variants-files>...]" << endl;
	cerr << endl;
	cerr << "Reads multiple lists of variations (e.g. as computed by laser-core) and merges them." << endl;
	cerr << endl;
	cerr << options_desc << endl;
	exit(1);
}

typedef boost::unordered_map<Variation, double> variation_weight_map_t;
typedef boost::unordered_set<Variation> variation_set_t;

typedef struct weighted_variation_t {
	Variation variation;
	double weight;
	weighted_variation_t(Variation v, double weight) : variation(v), weight(weight) {}
} weighted_variation_t;

typedef struct weighted_variation_pos_comparator_t {
	VariationUtils::variation_position_sort_t inner_comp;
	bool operator()(const weighted_variation_t& v1, const weighted_variation_t& v2) {
		return inner_comp(v1.variation, v2.variation);
	}
} weighted_variation_pos_comparator_t;

typedef struct weighted_variation_weight_comparator_t {
	VariationUtils::variation_position_sort_t inner_comp;
	bool operator()(const weighted_variation_t& v1, const weighted_variation_t& v2) {
		if (v1.weight != v2.weight) return v1.weight > v2.weight;
		return inner_comp(v1.variation, v2.variation);
	}
} weighted_variation_weight_comparator_t;

void read_variation_file(variation_weight_map_t& map, const string& filename, const variation_set_t* filter_set, int min_length) {
	ifstream variants_stream(filename.c_str());
	if (variants_stream.fail()) {
		ostringstream oss;
		oss << "Could not open \"" << filename << "\".";
		throw std::runtime_error(oss.str());
	}
	vector<double> weights;
	auto_ptr<vector<Variation> > variations = VariationListParser::parse(variants_stream, false, min_length, &weights);
	assert(variations->size() == weights.size());
	for (size_t i=0; i<variations->size(); ++i) {
		const Variation& v = variations->at(i);
		if (filter_set != 0) {
			if (filter_set->find(v) == filter_set->end()) continue;
		}
		variation_weight_map_t::iterator it = map.find(v);
		if (it == map.end()) {
			map.insert(make_pair(v, weights[i]));
		} else {
			it->second += weights[i];
		}
	}
	cerr << "Read " << variations->size() << " variations from file " << filename << endl;
}

int main(int argc, char* argv[]) {
	VersionInfo::checkAndPrintVersion("filter-variations", cerr);
	string commandline = VersionInfo::commandline(argc, argv);

	// PARAMETERS
	double min_weight;
	int min_length;
	bool sort_by_weight = false;
	bool all_from_filter = false;
	string filter_filename = "";
	vector<string> input_files;
	
	po::options_description options_desc("Allowed options");
	options_desc.add_options()
		("min_weight,m", po::value<double>(&min_weight)->default_value(1.0), "Minimal weight to be printed (default=1).")
		("min_length,l", po::value<int>(&min_length)->default_value(0), "Minimum length.")
		("sort_by_weight,w", po::value<bool>(&sort_by_weight)->zero_tokens(), "Sort by weight (default: by position)")
		("filter,f", po::value<string>(&filter_filename), "Filter file; only retain deletions in the given file.")
		("all_from_filter,a", po::value<bool>(&all_from_filter)->zero_tokens(), "Output all variations in the given filter set.")
		("input_file,I", po::value<vector<string> >(&input_files), "Input file with variants (equivalent to specifying input files as positional arguments).")
	;
 
	po::positional_options_description pos_options_desc;
	pos_options_desc.add("input_file", -1);

	po::variables_map options;
	try {
		po::store(po::command_line_parser(argc, argv).options(options_desc).positional(pos_options_desc).run(), options);
		po::notify(options);
	} catch(std::exception& e) {
		cerr << "error: " << e.what() << "\n";
		return 1;
	}
	cerr << "Commandline: " << commandline << endl;

	if (input_files.size() == 0) {
		usage(argv[0], options_desc);
	}

	if ((filter_filename.size() == 0) && (all_from_filter)) {
		cerr << "Error: option --all_from_filter (-a) requires option --filter (-f)" << endl;
		return 1;
	}
		
	variation_set_t* filter_set = 0;
	variation_weight_map_t variation_weight_map;
	
	try {
		if (filter_filename.size() > 0) {
			ifstream filter_stream(filter_filename.c_str());
			if (filter_stream.fail()) {
				ostringstream oss;
				oss << "Could not open \"" << filter_filename << "\".";
				throw std::runtime_error(oss.str());
			}
			auto_ptr<vector<Variation> > filter_list = VariationListParser::parse(filter_stream, false, min_length);
			filter_set = new variation_set_t();
			vector<Variation>::const_iterator it = filter_list->begin();
			for (; it != filter_list->end(); ++it) {
				filter_set->insert(*it);
				if (all_from_filter) {
					variation_weight_map.insert(make_pair(*it,0.0));
				}
			}
		}
		
		for (size_t i=0; i<input_files.size(); ++i) {
			read_variation_file(variation_weight_map, input_files[i], filter_set, min_length);
		}
	} catch(std::exception& e) {
		cerr << "Error: " << e.what() << endl;
		return 1;
	}
	cerr << "Found " << variation_weight_map.size() << " different variations (with length>=" << min_length << ") in " << input_files.size() << " files." << endl;
	
	vector<weighted_variation_t> variation_list;
	variation_weight_map_t::const_iterator map_it = variation_weight_map.begin();
	for (; map_it != variation_weight_map.end(); ++map_it) {
		if ((map_it->second >= min_weight) || (all_from_filter)) {
			variation_list.push_back(weighted_variation_t(map_it->first, map_it->second));
		}
	}
	cerr << "Weight cutoff (" << min_weight << ") " << (filter_set==0?"":"and filter constraints") << " met by " << variation_list.size() << " variations." << endl;
	cerr << "Sorting list of variations" << endl;
	if (sort_by_weight) {
		sort(variation_list.begin(), variation_list.end(), weighted_variation_weight_comparator_t());
	} else {
		sort(variation_list.begin(), variation_list.end(), weighted_variation_pos_comparator_t());
	}
	cerr << "Writing list of variations" << endl;
	vector<weighted_variation_t>::const_iterator list_it = variation_list.begin();
	for (; list_it != variation_list.end(); ++list_it) {
		cout << list_it->variation << ' ' << list_it->weight << endl;
	}

	return 0;
}
